<?php
/**
 * 抓取豆瓣租房信息
 * 引用包 requests https://github.com/rmccue/Requests
 */

//初始化
error_reporting(E_ERROR | E_WARNING | E_PARSE);
require("requests/library/Requests.php");
require("simple_html_dom.php");
Requests::register_autoloader();

//变量定义
$url = "http://www.douban.com/group/shanghaizufang/discussion?start=0";
$container = []; //存储的待处理文本信息

//开始
$body =  Requests::get($url, [], ['connect_timeout' => 100, 'timeout' => 100])->body; //页面主体信息
$html = str_get_html($body); //dom
$a = $html->find('td.title a');
foreach($a as $key => $item) {
	$container[$key]['id'] = explode('/', $item->href)[5];
	$container[$key]['text'] = $item->title;
	$container[$key]['pub_time'] = $item->parent()->parent()->children(3)->plaintext;
	$container[$key]['url'] = $item_url = $item->href;

	//根据url获取页面图片信息
	/*$urlbody = Requests::get($item_url,[], ['connect_timeout' => 100, 'timeout' => 100])->body;
	$urlhtml = str_get_html($urlbody);
	$imgs = $urlhtml->find('.topic-figure img');
    $container[$key]['images'] = [];
    foreach( $imgs as $img ) {
        $container[$key]['images'][] = $img->src;
    }*/
}
print_r($container);exit;
//简单的数据库连接
$h = mysql_connect('192.168.1.66', 'root', '111');
mysql_query('set names utf8');
mysql_select_db('zufang');

//拼接sql
if(!empty($container))
{
	$sql = 'insert into house_info (`title_id`, `title`, `url`, `pub_time`) values';
    $sql_img = 'insert into imgs (`title_id`, `img_url`) values';

	foreach($container as $value)
	{
		$sql .=  '(' . $value['id'] .',"' . $value['text'] .'", "' . $value['url'] .'", "' . $value['pub_time'] .'"),';
        foreach( $value['images'] as $image ) {
            $sql_img .= '(' . $value['id'] . ',"' . $image . '"),';
        }
    }

	$sql = substr($sql, 0, strrpos ($sql, ','));
    $sql_img = substr($sql_img, 0, strrpos ($sql_img, ','));
	if( mysql_query($sql) == false || mysql_query($sql_img) == false )
	{
	   print_r(mysql_error());exit;
	}

	if(mysql_num_rows(mysql_query('select count(1) from house_info')))
	{
		mysql_query('delete a from house_info a, house_info b where a.title_id = b.title_id and a.id > b.id'); //删除重复的数据
	}
}
